### Fig 1A
library(haven)
library(tidyverse)
library(weights)
library(labelled)
library(ggpubr)




#setwd("/Users/alicebrocheux/Desktop/U of R/Spring 2025/PSC 512 - Surveys/Survey Trends Project/GSS_stata")
#gss <- haven::read_dta("GSS_stata/gss7224_r1.dta")
rm(list=ls())
gss <- read_rds("clean-GSS-vars-w-weights.RDS")%>%
  subset(is.na(mode) | mode==1)


###################### Variable names and meaning ########################
#### Opinion of family income relative to American families = finrela ####
#### Satisfaction with financial situation = satfin                   ####
##########################################################################

# select vars of interest and compute weighted mean for finrela and satfin 
gss1 <- gss %>% 
  dplyr::select(year, id, wtssps, satfin, finalter, finrela, rincblls, class) %>%
  remove_labels() %>%
  mutate(satfin = as.numeric(recode(satfin, 
                                    "1" = "3",
                                    "2" = "2",
                                    "3" = "1")))%>%
  group_by(year) %>%
  filter(is.na(finrela) == F & is.na(satfin) == F) %>% 
  dplyr::mutate(wtavg_finrela = weighted.mean(finrela, wtssps, na.rm=T), 
                lwr_finrela = wtavg_finrela-1.96*sd(finrela*wtssps, na.rm=T)/sqrt(n()), 
                upr_finrela = wtavg_finrela+1.96*sd(finrela*wtssps, na.rm=T)/sqrt(n()),
                wtavg_satfin = weighted.mean(satfin, wtssps, na.rm=T), 
                lwr_satfin = wtavg_satfin-1.96*sd(satfin*wtssps, na.rm=T)/sqrt(n()), 
                upr_satfin = wtavg_satfin+1.96*sd(satfin*wtssps, na.rm=T)/sqrt(n()))

gss_finrela <- gss1 %>% select(year, wtavg_finrela, lwr_finrela, upr_finrela) 
gss_finrela <- gss_finrela[!duplicated(gss_finrela), ]

gss_satfin <- gss1 %>% select(year, wtavg_satfin, lwr_satfin, upr_satfin) 
gss_satfin <- gss_satfin[!duplicated(gss_satfin), ]

# compute weighted percentages for satfin

# N by year finrela; N by year for satfin is in n_vars_satfin

n_vars_finrela <- gss1 %>% group_by(year) %>% 
  reframe(n_finrela = sum(!is.na(finrela)))

#############################################################
########################### Plots ###########################
#############################################################


finrela <- gss1 %>%
  select(finrela) %>%
  drop_na()

## Plot weighted average finrela 
a<- ggplot(gss_finrela, aes(x=year, y=wtavg_finrela)) +
  geom_point() +
  geom_line(linetype="dashed") +
  scale_y_continuous(limits = c(1,5),
                     labels = c("Far Below Average", "Below Average", "Average",
                                "Above Average", "Far Above Average"))+
  theme_bw()+
  labs(y = "Category", x = "", 
       title = "A", 
       subtitle = "Compared with American families in general, would you say 
your family income is far below average, below average, 
average, above average, or far above average? N=70,428 (GSS)
Mean and 95% Confidence Intervals") +  
  geom_errorbar(aes(ymin = lwr_finrela, ymax = upr_finrela), linewidth=0.4) 
a
ggsave("PNAS/1A-ftf.png",width=6, height=6)
#geom_smooth(method = "lm", linewidth = 0.5)

satfin <- gss1 %>%
  select(satfin) %>%
  drop_na()

## Plot weighted avg satfin
b<- ggplot(gss_satfin, aes(x=year, y=wtavg_satfin)) +
  geom_point() +
  geom_line(linetype="dashed") +
  theme_bw()+
  scale_y_continuous(limits = c(1,3), breaks=c(1,2,3),
                     labels = c("Not satisfied at all",
                                "More or less satisfied",
                                "Pretty well satisfied"))+
  labs(y = "Category", x = "", color="Response",
       title = "B", 
       subtitle = "Would you say that you are pretty well satisfied with your
present financial situation, more or less satisfied, or 
not satisfied at all? N=70,428 (GSS)
Mean and 95% Confidence Intervals") +  
  geom_errorbar(aes(ymin = lwr_satfin, ymax = upr_satfin), linewidth=0.4)
b
ggsave("PNAS/1B-ftf.png", width=6, height=6)


####### t tests
# need need mean, Std. dev, N for: 2018 and 2024. And t-test between those with relevant stats.

o18 <- gss1 %>%
  subset(year==2018) %>%
  select(satfin, wtssps) %>%
  mutate(satfin = wtssps*satfin)%>%
  drop_na()

#n=2072

o24 <- gss1 %>%
  subset(year==2024)%>%
  select(satfin, wtssps) %>%
  mutate(satfin = wtssps*satfin)%>%
  drop_na()

#n=2072

##diff means
t.test(o18$satfin, o24$satfin)
sd(o18$satfin)
sd(o24$satfin)


health_df <- gss %>%
  mutate(health = as.numeric(recode(health, 
                                    "1" = "4",
                                    "2" = "3",
                                    "3" = "2",
                                    "4" = "1"))) %>%
  mutate(health_fac = factor(recode(health, 
                                    "4" = "Excellent",
                                    "3" = "Good",
                                    "2" = "Fair",
                                    "1" = "Poor"),
                             levels = c("Excellent",
                                        "Good",
                                        "Fair",
                                        "Poor")))%>%
  mutate(happy = as.numeric(recode(happy, 
                                   "1" = "3",
                                   "2" = "2",
                                   "3" = "1"))) %>%
  mutate(happy_fac = factor(recode(happy, 
                                   "3" = "Very happy",
                                   "2" = "Pretty happy",
                                   "1" = "Not too happy"),
                            levels = c("Not too happy",
                                       "Pretty happy",
                                       "Very happy")))

health_df1 <- health_df %>%  
  group_by(year) %>%
  summarise(happy_w_av_yr = weighted.mean(happy, wtssps, na.rm=TRUE),
            sd_happy = sd(happy, na.rm=TRUE),
            se_happy = sd_happy / sqrt(n()),
            health_w_av_yr = weighted.mean(health, wtssps, na.rm=TRUE),
            sd_health = sd(health, na.rm=TRUE),
            se_health = sd_health / sqrt(n()),
            health_fac=health_fac) %>% unique()


c<- ggplot(health_df1, aes(x=year, y=health_w_av_yr)) + geom_point() +
  geom_line(linetype="dashed") +
  theme_bw() +
  labs(x="", y="Category",title="C",
       subtitle="Would you say your own health, in general, 
is excellent, good, fair, or poor? N=58,448 (GSS)
Mean and 95% Confidence Intervals") +
  geom_errorbar(aes(ymin = (health_w_av_yr -1.96*se_health), 
                    ymax = (health_w_av_yr + 1.96*se_health)), width = 0.4) +
  scale_y_continuous(limits = c(1,4),
                     labels = c("Poor", "Fair", "Good", "Excellent"))
c
ggsave("PNAS/1C-ftf.png", width=6, height=6)


d<- ggplot(health_df1, aes(x=year, y=happy_w_av_yr)) + geom_point() +
  geom_line(linetype="dashed") +
  theme_bw() +
  labs(x="", y="Category",title="D",
       subtitle="Taken all together, how would you say things are these days:
would you say that you are very happy, pretty happy,
or not too happy? N=70,869 (GSS)
Mean and 95% Confidence Intervals") +
  geom_errorbar(aes(ymin = (happy_w_av_yr -1.96*se_happy), 
                    ymax = (happy_w_av_yr + 1.96*se_happy)), width = 0.4) +
  scale_y_continuous(limits = c(1,3), breaks=c(1,2,3),
                     labels = c("Not too happy",
                                "Pretty happy",
                                "Very happy"))
d
ggsave("PNAS/1D-ftf.png", width=6, height=6)

### make fig
png(file = "PNAS/figure-1-panel-ftf.png",
    height = 8.5,
    width = 14,
    units = "in",       # Use inches to match pdf dimensions
    res = 300)          # Set resolution (DPI) for high-quality output

print(ggarrange(a,b,c,d,
                nrow = 2, ncol = 2, common.legend = T, legend = "bottom"))  

dev.off()

o72 <- health_df %>%
  subset(year==1972) %>%
  select(year, health, wtssps) %>%
  mutate(health=wtssps*health)%>%
  drop_na()


o00 <- health_df %>%
  subset(year==2000) %>%
  select(year, health, wtssps) %>%
  mutate(health=wtssps*health)%>%  drop_na()


o18 <- health_df %>%
  subset(year==2018) %>%
  select(year, health, wtssps) %>%
  mutate(health=wtssps*health)%>%  drop_na()


o24 <- health_df %>%
  subset(year==2024) %>%
  select(year, health, wtssps) %>%
  mutate(health=wtssps*health)%>%  drop_na()

mean(o72$health)
mean(o00$health)
mean(o18$health)
mean(o24$health)

sd(o72$health)
sd(o00$health)
sd(o18$health)
sd(o24$health)

t.test(o18$health, o24$health)
t.test(o00$health, o24$health)


############## happiness t-tests
#For happiness: need mean, Std. dev, N for: 2018, and 2024. 
#And t-test between 2018 and 2024, with relevant stats.

o18 <- health_df %>%
  subset(year==2018) %>%
  select(year, happy,wtssps) %>%
  mutate(happy=wtssps*happy)%>%
  drop_na()


o24 <- health_df %>%
  subset(year==2024) %>%
  select(year, happy,wtssps) %>%
  mutate(happy=wtssps*happy)%>%
  drop_na()

sd(o18$happy)
sd(o24$happy)
t.test(o18$happy, o24$happy)
